BLAST-ing our genes to the Biomineralization Toolkit

On andromeda

Download protein file from genome

cd /data/putnamlab/zdellaert/Pdam-TagSeq/references

wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.pep.faa.gz

gunzip Pocillopora_acuta_HIv2.genes.pep.faa.gz

cd ..

mkdir blast
cd blast

On personal computer

scp  /Users/zoedellaert/Documents/URI/Heron-Pdam-gene-expression/BioInf/data/Biomineralization_Toolkit_FScucchia/Biomineralization_Toolkit_FScucchia.fasta zdellaert@ssh3.hac.uri.edu:/data/putnamlab/zdellaert/Pdam-TagSeq/blast/
Biomineralization_Toolkit_FScucchia.fasta

On andromeda

nano Biomineralization_blast.sh
#!/bin/bash
#SBATCH --job-name="Pacuta_TRP_blast"
#SBATCH -t 240:00:00
#SBATCH --export=NONE
#SBATCH --mail-type=BEGIN,END,FAIL #email you when job starts, stops and/or fails
#SBATCH --mail-user=zdellaert@uri.edu #your email to send notifications
#SBATCH --mem=100GB
#SBATCH --error="blast_out_error"
#SBATCH --output="blast_out"
#SBATCH --account=putnamlab
#SBATCH -D /data/putnamlab/zdellaert/Pdam-TagSeq/blast/
#SBATCH --nodes=1 --ntasks-per-node=20

module load BLAST+/2.9.0-iimpi-2019b

makeblastdb -in ../references/Pocillopora_acuta_HIv2.genes.pep.faa -out Pacuta_prot -dbtype prot

blastp -query Biomineralization_Toolkit_FScucchia.fasta -db Pacuta_prot -out Biomineralization_blast_results.txt -outfmt 0

blastp -query Biomineralization_Toolkit_FScucchia.fasta -db Pacuta_prot -out Biomineralization_blast_results_tab.txt -outfmt 6 -max_target_seqs 1
sbatch Biomineralization_blast.sh

Errors:

Warning: [blastp] Examining 5 or more matches is recommended FASTA-Reader: Ignoring invalid residues at position(s): On line 2741: 378, 383, 386-390, 401, 417, 420-422, 431, 437-439, 443, 459-461 Warning: [blastp] Query_168 Gene: g13552, N.. : One or more O characters replaced by X for alignment score calculations at positions 382, 390, 392, 422

On personal computer:

scp  zdellaert@ssh3.hac.uri.edu:/data/putnamlab/zdellaert/Pdam-TagSeq/blast/Biomineralization_blast_results.txt /Users/zoedellaert/Documents/URI/Heron-Pdam-gene-expression/BioInf/output

scp  zdellaert@ssh3.hac.uri.edu:/data/putnamlab/zdellaert/Pdam-TagSeq/blast/Biomineralization_blast_results_tab.txt /Users/zoedellaert/Documents/URI/Heron-Pdam-gene-expression/BioInf/output

Now, will take the best Pacuta alignment for each Biomineralization Gene and match to the name of that gene and make the dataframe into a format to match with differentially expressed/frontloaded genes or modules.

sessionInfo() #provides list of loaded packages and version of R. 
## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.33     R6_2.5.1          fastmap_1.1.1     xfun_0.39        
##  [5] cachem_1.0.8      knitr_1.42        htmltools_0.5.5   rmarkdown_2.21   
##  [9] cli_3.6.1         sass_0.4.6        jquerylib_0.1.4   compiler_4.3.0   
## [13] rstudioapi_0.15.0 tools_4.3.0       evaluate_0.21     bslib_0.4.2      
## [17] yaml_2.3.7        rlang_1.1.1       jsonlite_1.8.7
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
Biomin_genes <- read_excel("~/Documents/URI/Heron-Pdam-gene-expression/BioInf/data/Biomineralization_Toolkit_FScucchia/Biomineralization_Toolkit_FScucchia.xlsx")

Biomin_genes <- Biomin_genes %>% select(-`blasted protein in Stylophora`)
Biomin_blast_results <- read.delim("~/Documents/URI/Heron-Pdam-gene-expression/BioInf/output/Biomineralization_blast_results_tab.txt", header=FALSE) 

Biomin_blast_results <- Biomin_blast_results %>% select(V1, V2) %>% distinct()
# Merge data frames based on accessionnumber/geneID
merged_data <- Biomin_genes %>%
  inner_join(Biomin_blast_results, by = c("accessionnumber/geneID" = "V1")) %>% rename("Pocillopora_acuta_best_hit" = "V2")

write.csv(merged_data, "~/Documents/URI/Heron-Pdam-gene-expression/BioInf/output/Biomin_blast_Pocillopora_acuta_best_hit.csv", row.names = F)

How many of our 9011 genes are represented in the Biomineralization genes?

DEGs <- read.csv(file="../../../output/Slope_Base/signif_genes_normcts.csv", sep=',', header=TRUE)  %>% dplyr::select(!c('X'))

#NOTE! This is not a file only with differentially expressed genes, this contains all of the genes in our dataset but also contains p-value information and fold change information to help determine which genes are signficant DEGs based on our model in glmmSeq

rownames(DEGs) <- DEGs$Gene

dim(DEGs)
## [1] 9011   75
Biomin_genes <- DEGs %>%
  inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))

Biomin_genes$definition
##   [1] "Mucin4-like protein"                                                                            
##   [2] "Sushi domain-containing"                                                                        
##   [3] "Mucin-4 [Stylophora pistillata]"                                                                
##   [4] "mammalian ependymin-related protein 1-like [Stylophora pistillata]"                             
##   [5] "uncharacterized protein LOC111337489 [Stylophora pistillata]"                                   
##   [6] "Viral inclusion protein"                                                                        
##   [7] "Annotated: Actin"                                                                               
##   [8] "plasma membrane calcium ATPase [Stylophora pistillata]"                                         
##   [9] "Hephaestin-like protein"                                                                        
##  [10] "hephaestin-like protein [Stylophora pistillata]"                                                
##  [11] "Annotated: Vitellogenin"                                                                        
##  [12] "clone g15888 vitellogenin-like protein gene"                                                    
##  [13] "clone g1441 vitellogenin-like protein gene"                                                     
##  [14] "vitellogenin-like [Stylophora pistillata]"                                                      
##  [15] "Zona pellucida domain-containing protein"                                                       
##  [16] "Annotated: Zona Pellucida (ZP domain-containing)"                                               
##  [17] "Acropora millepora clone B26 hypothetical protein p251_4"                                       
##  [18] "Zona pellucida"                                                                                 
##  [19] "ZP domain-containing protein-like [Stylophora pistillata]"                                      
##  [20] "solute carrier family 4 member gamma [Stylophora pistillata]"                                   
##  [21] "Sacsin [Stylophora pistillata]"                                                                 
##  [22] "Complement C3 [Stylophora pistillata]"                                                          
##  [23] "uncharacterized protein LOC111323869 [Stylophora pistillata]"                                   
##  [24] "uncharacterized protein LOC111345150 [Stylophora pistillata]"                                   
##  [25] "Major yolk protein"                                                                             
##  [26] "major yolk protein-like isoform X2 [Stylophora pistillata]"                                     
##  [27] "SAARP3"                                                                                         
##  [28] "Acidic SOMP (Full-Length p27)"                                                                  
##  [29] "Acidic skeletal organic matrix protein (Acidic SOMP)"                                           
##  [30] "CARP1 [Stylophora pistillata]"                                                                  
##  [31] "Annotated: CARP1"                                                                               
##  [32] "Uncharacterized skeletal organic matrix protein-3  (USOMP-3)"                                   
##  [33] "Collagen alpha-1 chain"                                                                         
##  [34] "Annotated: Tolloid-Like"                                                                        
##  [35] "CUB domain-containing protein-like isoform X2 [Stylophora pistillata]"                          
##  [36] "Protocadherin-like"                                                                             
##  [37] "chymotrypsin-like elastase family member 1 [Stylophora pistillata]"                             
##  [38] "Cephalotoxin-like protein"                                                                      
##  [39] "microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]"           
##  [40] "microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]"           
##  [41] "sodium bicarbonate cotransporter 3-like isoform X2"                                             
##  [42] "Poly [ADP-ribose] polymerase 11 [Stylophora pistillata]"                                        
##  [43] "carbonic anhydrase [Stylophora pistillata]"                                                     
##  [44] "carbonic anhydrase 2"                                                                           
##  [45] "Annotated: Carbonic Anhydrase (STPCA2-1)"                                                       
##  [46] "Annotated: CarbonicAnhyrase"                                                                    
##  [47] "Annotated: N/A, named it CARP6-partial"                                                         
##  [48] "Annotated: USOMPS13"                                                                            
##  [49] "Stylophora pistillata clone g11702 hypothetical protein gene"                                   
##  [50] "Annotated: Kielin-Like"                                                                         
##  [51] "Kielin/chordin like"                                                                            
##  [52] "thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]"                              
##  [53] "Flagellar associated protein"                                                                   
##  [54] "protein lingerer-like [Stylophora pistillata]"                                                  
##  [55] "CUB and peptidase domain-containing protein 2-like [Stylophora pistillata]"                     
##  [56] "Protein FAM208A [Stylophora pistillata]"                                                        
##  [57] "spore wall protein 2-like isoform X3 [Stylophora pistillata]"                                   
##  [58] "L-type calcium channel alpha-1 subunit"                                                         
##  [59] "Annotated: Fibronectin"                                                                         
##  [60] "Annotated: Fibronectin (Fibronectin-2)"                                                         
##  [61] "Annotated: carbonic anhydrase (STPCA2-2)"                                                       
##  [62] "Stylophora pistillata clone g19762 hypothetical protein gene"                                   
##  [63] "CARP3 [Stylophora pistillata]"                                                                  
##  [64] "galaxin2"                                                                                       
##  [65] "galaxin"                                                                                        
##  [66] "Galaxin 2"                                                                                      
##  [67] "galaxin-like isoform X2 [Stylophora pistillata]"                                                
##  [68] "Annotated: Protoacadherin (PC4)"                                                                
##  [69] "Annotated: Protocadherin (PC2)"                                                                 
##  [70] "Annotated: Protocadherin (PC3)"                                                                 
##  [71] "Annotated: Protocadherin (PC3)"                                                                 
##  [72] "Annotated: Cadherin"                                                                            
##  [73] "Annotated: Protocadherin (PC1)"                                                                 
##  [74] "Annotated: Protoacadherin (PC4)"                                                                
##  [75] "Protocadherin fat-like"                                                                         
##  [76] "MAM and LDLr domain-containing protein"                                                         
##  [77] "MAM and LDLr domain-containing protein"                                                         
##  [78] "Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)"                                 
##  [79] "MAM and LDL-receptor domain- containing protein 2"                                              
##  [80] "MAM and LDL-receptor domain- containing protein 1"                                              
##  [81] "MAM domain anchor protein"                                                                      
##  [82] "MAM/LDL receptor domain containing protein"                                                     
##  [83] "Zonadhesion-like precursor"                                                                     
##  [84] "MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]"          
##  [85] "band 3 anion transport protein-like"                                                            
##  [86] "LOW QUALITY PROTEIN: uncharacterized protein LOC111321626 [Stylophora pistillata]"              
##  [87] "MAGUK p55 subfamily member 7-like [Stylophora pistillata]"                                      
##  [88] "uncharacterized protein LOC111344812 [Stylophora pistillata]"                                   
##  [89] "SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]"                         
##  [90] "Late embryogenesis protein"                                                                     
##  [91] "EGF and laminin G domain-containing protein"                                                    
##  [92] "EGF and laminin G domain-containing protein"                                                    
##  [93] "Laminin G domain-containing protein"                                                            
##  [94] "EGF and laminin G domain-containing protein"                                                    
##  [95] "Annotated: EGF and LamininG-Like (EGF LamG2)"                                                   
##  [96] "Annotated: EGF and LamininG-Like (EGF LamG1)"                                                   
##  [97] "EGF and laminin G domain-containing protein"                                                    
##  [98] "Contactin-associated protein"                                                                   
##  [99] "Neurexin"                                                                                       
## [100] "EGF and laminin G domain-containing protein-like [Stylophora pistillata]"                       
## [101] "Annotated: Protocadherin (PC5)"                                                                 
## [102] "Protocadherin"                                                                                  
## [103] "endothelin-converting enzyme 1-like isoform X2 [Stylophora pistillata]"                         
## [104] "PHD finger protein 21A-like [Stylophora pistillata]"                                            
## [105] "low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]"                
## [106] "Acropora yongei Na+/Ca2+ exchanger"                                                             
## [107] "TSP-1 and VWA domain-containing"                                                                
## [108] "Annotated: Thrombospondin-like protein (Thrombospondin)"                                        
## [109] "Annotated: Coadhesin"                                                                           
## [110] "clone g9951 alpha collagen-like protein gene"                                                   
## [111] "Thrombospondin"                                                                                 
## [112] "Hemicentin"                                                                                     
## [113] "coadhesin-like isoform X3 [Stylophora pistillata]"                                              
## [114] "Uncharacterized skeletal organic matrix protein-6 (USOMP6)"                                     
## [115] "Integrin - alpha"                                                                               
## [116] "hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]"                                
## [117] "von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]"
## [118] "collagenase 3-like [Stylophora pistillata]"                                                     
## [119] "digestive cysteine proteinase 1-like [Stylophora pistillata]"                                   
## [120] "Cystein-rich"                                                                                   
## [121] "Uncharacterized skeletal organic matrix protein-2  (USOMP-2)"                                   
## [122] "polycystic kidney disease 1-related (PKD1-related) protein"                                     
## [123] "polycystic kidney disease 1-related (PKD1-related) protein"                                     
## [124] "Adi-SAARP2"                                                                                     
## [125] "Skeletal acidic Asp-rich Protein 2 (SAARP2)"                                                    
## [126] "CARP9"                                                                                          
## [127] "skeletal aspartic acid-rich protein 2-like (CARP5)"
length(Biomin_genes$definition)
## [1] 127
Biomin_genes_names <- unique(Biomin_genes$Gene)

length(Biomin_genes_names)
## [1] 65
Biomin_genes %>% select(Gene, `accessionnumber/geneID`, definition, Ref) 
##                                           Gene
## 1    Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## 2    Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## 3    Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## 4    Pocillopora_acuta_HIv2___RNAseq.g25351.t1
## 5     Pocillopora_acuta_HIv2___RNAseq.g7085.t1
## 6    Pocillopora_acuta_HIv2___RNAseq.g22851.t1
## 7    Pocillopora_acuta_HIv2___RNAseq.g14505.t1
## 8    Pocillopora_acuta_HIv2___RNAseq.g27976.t1
## 9    Pocillopora_acuta_HIv2___RNAseq.g27566.t1
## 10   Pocillopora_acuta_HIv2___RNAseq.g27566.t1
## 11      Pocillopora_acuta_HIv2___TS.g13222.t1b
## 12      Pocillopora_acuta_HIv2___TS.g13222.t1b
## 13      Pocillopora_acuta_HIv2___TS.g13222.t1b
## 14      Pocillopora_acuta_HIv2___TS.g13222.t1b
## 15        Pocillopora_acuta_HIv2___TS.g2710.t1
## 16        Pocillopora_acuta_HIv2___TS.g2710.t1
## 17        Pocillopora_acuta_HIv2___TS.g2710.t1
## 18        Pocillopora_acuta_HIv2___TS.g2710.t1
## 19        Pocillopora_acuta_HIv2___TS.g2710.t1
## 20   Pocillopora_acuta_HIv2___RNAseq.g15280.t1
## 21   Pocillopora_acuta_HIv2___RNAseq.g25214.t1
## 22    Pocillopora_acuta_HIv2___RNAseq.g8821.t1
## 23   Pocillopora_acuta_HIv2___RNAseq.g21232.t1
## 24   Pocillopora_acuta_HIv2___RNAseq.g20587.t2
## 25   Pocillopora_acuta_HIv2___RNAseq.g14653.t1
## 26   Pocillopora_acuta_HIv2___RNAseq.g14653.t1
## 27   Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 28   Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 29   Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 30   Pocillopora_acuta_HIv2___RNAseq.g16280.t1
## 31   Pocillopora_acuta_HIv2___RNAseq.g16280.t1
## 32      Pocillopora_acuta_HIv2___TS.g23724.t1a
## 33        Pocillopora_acuta_HIv2___TS.g1359.t1
## 34   Pocillopora_acuta_HIv2___RNAseq.g26037.t1
## 35   Pocillopora_acuta_HIv2___RNAseq.g26035.t1
## 36    Pocillopora_acuta_HIv2___RNAseq.g3235.t1
## 37   Pocillopora_acuta_HIv2___RNAseq.g19288.t1
## 38    Pocillopora_acuta_HIv2___RNAseq.g5013.t1
## 39       Pocillopora_acuta_HIv2___TS.g11659.t1
## 40       Pocillopora_acuta_HIv2___TS.g11659.t1
## 41    Pocillopora_acuta_HIv2___RNAseq.g7402.t1
## 42  Pocillopora_acuta_HIv2___RNAseq.g14663.t1a
## 43       Pocillopora_acuta_HIv2___TS.g12304.t1
## 44       Pocillopora_acuta_HIv2___TS.g12304.t1
## 45       Pocillopora_acuta_HIv2___TS.g12304.t1
## 46       Pocillopora_acuta_HIv2___TS.g12304.t1
## 47        Pocillopora_acuta_HIv2___TS.g5112.t1
## 48       Pocillopora_acuta_HIv2___TS.g26810.t1
## 49       Pocillopora_acuta_HIv2___TS.g26810.t1
## 50    Pocillopora_acuta_HIv2___RNAseq.g3780.t1
## 51    Pocillopora_acuta_HIv2___RNAseq.g3780.t1
## 52   Pocillopora_acuta_HIv2___RNAseq.g10093.t2
## 53   Pocillopora_acuta_HIv2___RNAseq.g11609.t1
## 54    Pocillopora_acuta_HIv2___RNAseq.g7908.t1
## 55   Pocillopora_acuta_HIv2___RNAseq.g21338.t1
## 56  Pocillopora_acuta_HIv2___RNAseq.g26846.t1a
## 57    Pocillopora_acuta_HIv2___RNAseq.g5807.t1
## 58   Pocillopora_acuta_HIv2___RNAseq.g21501.t1
## 59   Pocillopora_acuta_HIv2___RNAseq.g21517.t1
## 60   Pocillopora_acuta_HIv2___RNAseq.g21517.t1
## 61   Pocillopora_acuta_HIv2___RNAseq.g13824.t1
## 62         Pocillopora_acuta_HIv2___TS.g425.t1
## 63   Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 64   Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 65   Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 66   Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 67   Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 68        Pocillopora_acuta_HIv2___TS.g6583.t1
## 69        Pocillopora_acuta_HIv2___TS.g6583.t1
## 70        Pocillopora_acuta_HIv2___TS.g6583.t1
## 71        Pocillopora_acuta_HIv2___TS.g6583.t1
## 72        Pocillopora_acuta_HIv2___TS.g6583.t1
## 73        Pocillopora_acuta_HIv2___TS.g6583.t1
## 74        Pocillopora_acuta_HIv2___TS.g6583.t1
## 75        Pocillopora_acuta_HIv2___TS.g6583.t1
## 76   Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 77   Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 78   Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 79   Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 80   Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 81   Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 82   Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 83   Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 84   Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 85       Pocillopora_acuta_HIv2___TS.g27873.t1
## 86    Pocillopora_acuta_HIv2___RNAseq.g7668.t1
## 87   Pocillopora_acuta_HIv2___RNAseq.g15517.t1
## 88  Pocillopora_acuta_HIv2___RNAseq.g24861.t1b
## 89   Pocillopora_acuta_HIv2___RNAseq.g27376.t1
## 90   Pocillopora_acuta_HIv2___RNAseq.g16715.t1
## 91   Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 92   Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 93   Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 94   Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 95   Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 96   Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 97   Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 98   Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 99   Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 100  Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 101  Pocillopora_acuta_HIv2___RNAseq.g22388.t1
## 102  Pocillopora_acuta_HIv2___RNAseq.g22388.t1
## 103  Pocillopora_acuta_HIv2___RNAseq.g19211.t1
## 104   Pocillopora_acuta_HIv2___RNAseq.g1634.t1
## 105   Pocillopora_acuta_HIv2___RNAseq.g4085.t1
## 106  Pocillopora_acuta_HIv2___RNAseq.g24639.t1
## 107   Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 108   Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 109   Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 110   Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 111   Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 112   Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 113   Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 114      Pocillopora_acuta_HIv2___TS.g22622.t1
## 115      Pocillopora_acuta_HIv2___TS.g15792.t1
## 116      Pocillopora_acuta_HIv2___TS.g15792.t1
## 117  Pocillopora_acuta_HIv2___RNAseq.g28226.t2
## 118       Pocillopora_acuta_HIv2___TS.g5338.t1
## 119  Pocillopora_acuta_HIv2___RNAseq.g18103.t1
## 120      Pocillopora_acuta_HIv2___TS.g1545.t1b
## 121      Pocillopora_acuta_HIv2___TS.g1545.t1b
## 122  Pocillopora_acuta_HIv2___RNAseq.g16433.t1
## 123  Pocillopora_acuta_HIv2___RNAseq.g16433.t1
## 124  Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## 125  Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## 126  Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## 127  Pocillopora_acuta_HIv2___RNAseq.g22261.t1
##                 accessionnumber/geneID
## 1                     aug_v2a.09809.t1
## 2                            P13_g6918
## 3                           PFX18785.1
## 4                       XP_022794351.1
## 5                       XP_022799541.1
## 6                             P4_g9861
## 7                           Gene:g9094
## 8                           AAR13013.1
## 9                     aug_v2a.24015.t1
## 10                      XP_022788227.1
## 11                      Gene:g15294.t1
## 12                          P24_g15888
## 13                           P26_g1441
## 14                      XP_022779720.1
## 15                    aug_v2a.07627.t1
## 16                           Gene:g907
## 17                          JN631095.1
## 18                          P21_g18277
## 19                      XP_022806326.1
## 20                          AJQ31790.1
## 21                          PFX13778.1
## 22                          PFX26597.1
## 23                      XP_022783044.1
## 24                      XP_022808163.1
## 25                            P8_g9654
## 26                      XP_022786918.1
## 27                    aug_v2a.06327.t1
## 28                         Gene:g13552
## 29                          JR972076.1
## 30                          AGE35225.2
## 31                          Gene:g1484
## 32                          JR997000.1
## 33                          JR991083.1
## 34                       Gene:g5735.t1
## 35                      XP_022799089.1
## 36                    aug_v2a.19518.t1
## 37                      XP_022788730.1
## 38                          JR986059.1
## 39                      XP_022809269.1
## 40                      XP_022809270.1
## 41                      XP_022801463.1
## 42                          PFX27832.1
## 43                          ACE95141.1
## 44                          EU532164.1
## 45                      Gene:g29033.t1
## 46                      Gene:g29034.t1
## 47                          Gene:g8396
## 48                      Gene:g30385.t1
## 49                          P16_g11702
## 50                         Gene:g39770
## 51                           P32_g5540
## 52                      XP_022804785.1
## 53                           P33_g8985
## 54                      XP_022806664.1
## 55                      XP_022780694.1
## 56                          PFX15740.1
## 57                      XP_022803872.1
## 58                          AAD11470.1
## 59                         Gene:g22569
## 60                         Gene:g37058
## 61                         Gene:g27814
## 62                          P22_g19762
## 63                          AGE35226.1
## 64                    aug_v2a.15065.t1
## 65                    aug_v2a.18631.t1
## 66                          JR976690.1
## 67                      XP_022794122.1
## 68                          AGG36361.1
## 69                          Gene:10186
## 70                         Gene:g10187
## 71                         Gene:g10188
## 72                          Gene:g2115
## 73                          Gene:g2116
## 74                            Gene:g30
## 75      P9_g10811;P1_g11108;P10_g11107
## 76                    aug_v2a.09968.t1
## 77                    aug_v2a.09969.t1
## 78                         Gene:g15955
## 79                          JR994474.1
## 80                          JT011118.1
## 81                           P20_g6066
## 82                           P34_g1714
## 83                          P36_g13890
## 84                      XP_022794736.1
## 85                      XP_022788270.1
## 86                      XP_022780303.1
## 87                      XP_022789932.1
## 88                      XP_022807807.1
## 89                      XP_022806928.1
## 90                          P28_g11651
## 91                    aug_v2a.06122.t1
## 92                    aug_v2a.06123.t1
## 93                    aug_v2a.15580.t1
## 94                    aug_v2a.24512.t1
## 95                         Gene:g34749
## 96                          Gene:g7086
## 97                          JR980881.1
## 98                          P19_g20041
## 99                          P31_g20420
## 100                     XP_022804012.1
## 101                        Gene:g24177
## 102                          P23_g1057
## 103                     XP_022789591.1
## 104                     XP_022790441.1
## 105                     XP_022798902.1
## 106                         MG182344.1
## 107                   aug_v2a.05945.t1
## 108                         Gene:g2829
## 109                      Gene:g2829.t1
## 110                          P14_g9951
## 111                          P3_g12510
## 112                          P5_g11674
## 113                     XP_022783415.1
## 114                         JR971508.1
## 115                         P27_g18472
## 116                         PFX30903.1
## 117                     XP_022810585.1
## 118                     XP_022783952.1
## 119                     XP_022803524.1
## 120                   aug_v2a.15064.t1
## 121                         JR982706.1
## 122                      aug_v2a.02830
## 123                   aug_v2a.02830.t1
## 124 aug_v2a.01440.t1(aug_v2a.01441.t1)
## 125                         JR991407.1
## 126                          P15_g1532
## 127                     XP_022780690.1
##                                                                                          definition
## 1                                                                               Mucin4-like protein
## 2                                                                           Sushi domain-containing
## 3                                                                   Mucin-4 [Stylophora pistillata]
## 4                                mammalian ependymin-related protein 1-like [Stylophora pistillata]
## 5                                      uncharacterized protein LOC111337489 [Stylophora pistillata]
## 6                                                                           Viral inclusion protein
## 7                                                                                  Annotated: Actin
## 8                                            plasma membrane calcium ATPase [Stylophora pistillata]
## 9                                                                           Hephaestin-like protein
## 10                                                  hephaestin-like protein [Stylophora pistillata]
## 11                                                                          Annotated: Vitellogenin
## 12                                                      clone g15888 vitellogenin-like protein gene
## 13                                                       clone g1441 vitellogenin-like protein gene
## 14                                                        vitellogenin-like [Stylophora pistillata]
## 15                                                         Zona pellucida domain-containing protein
## 16                                                 Annotated: Zona Pellucida (ZP domain-containing)
## 17                                         Acropora millepora clone B26 hypothetical protein p251_4
## 18                                                                                   Zona pellucida
## 19                                        ZP domain-containing protein-like [Stylophora pistillata]
## 20                                     solute carrier family 4 member gamma [Stylophora pistillata]
## 21                                                                   Sacsin [Stylophora pistillata]
## 22                                                            Complement C3 [Stylophora pistillata]
## 23                                     uncharacterized protein LOC111323869 [Stylophora pistillata]
## 24                                     uncharacterized protein LOC111345150 [Stylophora pistillata]
## 25                                                                               Major yolk protein
## 26                                       major yolk protein-like isoform X2 [Stylophora pistillata]
## 27                                                                                           SAARP3
## 28                                                                    Acidic SOMP (Full-Length p27)
## 29                                             Acidic skeletal organic matrix protein (Acidic SOMP)
## 30                                                                    CARP1 [Stylophora pistillata]
## 31                                                                                 Annotated: CARP1
## 32                                     Uncharacterized skeletal organic matrix protein-3  (USOMP-3)
## 33                                                                           Collagen alpha-1 chain
## 34                                                                          Annotated: Tolloid-Like
## 35                            CUB domain-containing protein-like isoform X2 [Stylophora pistillata]
## 36                                                                               Protocadherin-like
## 37                               chymotrypsin-like elastase family member 1 [Stylophora pistillata]
## 38                                                                        Cephalotoxin-like protein
## 39             microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]
## 40             microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]
## 41                                               sodium bicarbonate cotransporter 3-like isoform X2
## 42                                          Poly [ADP-ribose] polymerase 11 [Stylophora pistillata]
## 43                                                       carbonic anhydrase [Stylophora pistillata]
## 44                                                                             carbonic anhydrase 2
## 45                                                         Annotated: Carbonic Anhydrase (STPCA2-1)
## 46                                                                      Annotated: CarbonicAnhyrase
## 47                                                           Annotated: N/A, named it CARP6-partial
## 48                                                                              Annotated: USOMPS13
## 49                                     Stylophora pistillata clone g11702 hypothetical protein gene
## 50                                                                           Annotated: Kielin-Like
## 51                                                                              Kielin/chordin like
## 52                                thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 53                                                                     Flagellar associated protein
## 54                                                    protein lingerer-like [Stylophora pistillata]
## 55                       CUB and peptidase domain-containing protein 2-like [Stylophora pistillata]
## 56                                                          Protein FAM208A [Stylophora pistillata]
## 57                                     spore wall protein 2-like isoform X3 [Stylophora pistillata]
## 58                                                           L-type calcium channel alpha-1 subunit
## 59                                                                           Annotated: Fibronectin
## 60                                                           Annotated: Fibronectin (Fibronectin-2)
## 61                                                         Annotated: carbonic anhydrase (STPCA2-2)
## 62                                     Stylophora pistillata clone g19762 hypothetical protein gene
## 63                                                                    CARP3 [Stylophora pistillata]
## 64                                                                                         galaxin2
## 65                                                                                          galaxin
## 66                                                                                        Galaxin 2
## 67                                                  galaxin-like isoform X2 [Stylophora pistillata]
## 68                                                                  Annotated: Protoacadherin (PC4)
## 69                                                                   Annotated: Protocadherin (PC2)
## 70                                                                   Annotated: Protocadherin (PC3)
## 71                                                                   Annotated: Protocadherin (PC3)
## 72                                                                              Annotated: Cadherin
## 73                                                                   Annotated: Protocadherin (PC1)
## 74                                                                  Annotated: Protoacadherin (PC4)
## 75                                                                           Protocadherin fat-like
## 76                                                           MAM and LDLr domain-containing protein
## 77                                                           MAM and LDLr domain-containing protein
## 78                                   Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)
## 79                                                MAM and LDL-receptor domain- containing protein 2
## 80                                                MAM and LDL-receptor domain- containing protein 1
## 81                                                                        MAM domain anchor protein
## 82                                                       MAM/LDL receptor domain containing protein
## 83                                                                       Zonadhesion-like precursor
## 84            MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]
## 85                                                              band 3 anion transport protein-like
## 86                LOW QUALITY PROTEIN: uncharacterized protein LOC111321626 [Stylophora pistillata]
## 87                                        MAGUK p55 subfamily member 7-like [Stylophora pistillata]
## 88                                     uncharacterized protein LOC111344812 [Stylophora pistillata]
## 89                           SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]
## 90                                                                       Late embryogenesis protein
## 91                                                      EGF and laminin G domain-containing protein
## 92                                                      EGF and laminin G domain-containing protein
## 93                                                              Laminin G domain-containing protein
## 94                                                      EGF and laminin G domain-containing protein
## 95                                                     Annotated: EGF and LamininG-Like (EGF LamG2)
## 96                                                     Annotated: EGF and LamininG-Like (EGF LamG1)
## 97                                                      EGF and laminin G domain-containing protein
## 98                                                                     Contactin-associated protein
## 99                                                                                         Neurexin
## 100                        EGF and laminin G domain-containing protein-like [Stylophora pistillata]
## 101                                                                  Annotated: Protocadherin (PC5)
## 102                                                                                   Protocadherin
## 103                          endothelin-converting enzyme 1-like isoform X2 [Stylophora pistillata]
## 104                                             PHD finger protein 21A-like [Stylophora pistillata]
## 105                 low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]
## 106                                                              Acropora yongei Na+/Ca2+ exchanger
## 107                                                                 TSP-1 and VWA domain-containing
## 108                                         Annotated: Thrombospondin-like protein (Thrombospondin)
## 109                                                                            Annotated: Coadhesin
## 110                                                    clone g9951 alpha collagen-like protein gene
## 111                                                                                  Thrombospondin
## 112                                                                                      Hemicentin
## 113                                               coadhesin-like isoform X3 [Stylophora pistillata]
## 114                                      Uncharacterized skeletal organic matrix protein-6 (USOMP6)
## 115                                                                                Integrin - alpha
## 116                                 hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]
## 117 von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]
## 118                                                      collagenase 3-like [Stylophora pistillata]
## 119                                    digestive cysteine proteinase 1-like [Stylophora pistillata]
## 120                                                                                    Cystein-rich
## 121                                    Uncharacterized skeletal organic matrix protein-2  (USOMP-2)
## 122                                      polycystic kidney disease 1-related (PKD1-related) protein
## 123                                      polycystic kidney disease 1-related (PKD1-related) protein
## 124                                                                                      Adi-SAARP2
## 125                                                     Skeletal acidic Asp-rich Protein 2 (SAARP2)
## 126                                                                                           CARP9
## 127                                              skeletal aspartic acid-rich protein 2-like (CARP5)
##                           Ref
## 1       Takeuchi et al., 2016
## 2          Drake et al., 2013
## 3          Peled et al., 2020
## 4          Peled et al., 2020
## 5          Peled et al., 2020
## 6          Drake et al., 2013
## 7   Mummadisetti et al., 2021
## 8        Zoccola et al., 2004
## 9       Takeuchi et al., 2016
## 10         Peled et al., 2020
## 11  Mummadisetti et al., 2021
## 12         Drake et al., 2013
## 13         Drake et al., 2013
## 14         Peled et al., 2020
## 15      Takeuchi et al., 2016
## 16  Mummadisetti et al., 2021
## 17       Hayward et al., 2011
## 18         Drake et al., 2013
## 19         Peled et al., 2020
## 20       Zoccola et al., 2015
## 21         Peled et al., 2020
## 22         Peled et al., 2020
## 23         Peled et al., 2020
## 24         Peled et al., 2020
## 25         Drake et al., 2013
## 26         Peled et al., 2020
## 27      Takeuchi et al., 2016
## 28  Mummadisetti et al., 2021
## 29   Ramos-Silva et al., 2013
## 30          Mass et al., 2013
## 31  Mummadisetti et al., 2021
## 32   Ramos-Silva et al., 2013
## 33   Ramos-Silva et al., 2013
## 34  Mummadisetti et al., 2021
## 35         Peled et al., 2020
## 36      Takeuchi et al., 2016
## 37         Peled et al., 2020
## 38   Ramos-Silva et al., 2013
## 39         Peled et al., 2020
## 40         Peled et al., 2020
## 41       Zoccola et al., 2015
## 42         Peled et al., 2020
## 43          Moya et al., 2008
## 44      Bertucci et al., 2011
## 45  Mummadisetti et al., 2021
## 46  Mummadisetti et al., 2021
## 47  Mummadisetti et al., 2021
## 48  Mummadisetti et al., 2021
## 49         Drake et al., 2013
## 50  Mummadisetti et al., 2021
## 51         Drake et al., 2013
## 52         Peled et al., 2020
## 53         Drake et al., 2013
## 54         Peled et al., 2020
## 55         Peled et al., 2020
## 56         Peled et al., 2020
## 57         Peled et al., 2020
## 58       Zoccola et al., 1999
## 59  Mummadisetti et al., 2021
## 60  Mummadisetti et al., 2021
## 61  Mummadisetti et al., 2021
## 62         Drake et al., 2013
## 63          Mass et al., 2013
## 64      Takeuchi et al., 2016
## 65      Takeuchi et al., 2016
## 66   Ramos-Silva et al., 2013
## 67         Peled et al., 2020
## 68         Drake et al., 2013
## 69  Mummadisetti et al., 2021
## 70  Mummadisetti et al., 2021
## 71  Mummadisetti et al., 2021
## 72  Mummadisetti et al., 2021
## 73  Mummadisetti et al., 2021
## 74  Mummadisetti et al., 2021
## 75         Drake et al., 2013
## 76      Takeuchi et al., 2016
## 77      Takeuchi et al., 2016
## 78  Mummadisetti et al., 2021
## 79   Ramos-Silva et al., 2013
## 80   Ramos-Silva et al., 2013
## 81         Drake et al., 2013
## 82         Drake et al., 2013
## 83         Drake et al., 2013
## 84         Peled et al., 2020
## 85       Zoccola et al., 2015
## 86         Peled et al., 2020
## 87         Peled et al., 2020
## 88         Peled et al., 2020
## 89         Peled et al., 2020
## 90         Drake et al., 2013
## 91      Takeuchi et al., 2016
## 92      Takeuchi et al., 2016
## 93      Takeuchi et al., 2016
## 94      Takeuchi et al., 2016
## 95  Mummadisetti et al., 2021
## 96  Mummadisetti et al., 2021
## 97   Ramos-Silva et al., 2013
## 98         Drake et al., 2013
## 99         Drake et al., 2013
## 100        Peled et al., 2020
## 101 Mummadisetti et al., 2021
## 102        Drake et al., 2013
## 103        Peled et al., 2020
## 104        Peled et al., 2020
## 105        Peled et al., 2020
## 106       Barron et al., 2018
## 107     Takeuchi et al., 2016
## 108 Mummadisetti et al., 2021
## 109 Mummadisetti et al., 2021
## 110        Drake et al., 2013
## 111        Drake et al., 2013
## 112        Drake et al., 2013
## 113        Peled et al., 2020
## 114  Ramos-Silva et al., 2013
## 115        Drake et al., 2013
## 116        Peled et al., 2020
## 117        Peled et al., 2020
## 118        Peled et al., 2020
## 119        Peled et al., 2020
## 120     Takeuchi et al., 2016
## 121  Ramos-Silva et al., 2013
## 122     Takeuchi et al., 2016
## 123     Takeuchi et al., 2016
## 124     Takeuchi et al., 2016
## 125  Ramos-Silva et al., 2013
## 126        Drake et al., 2013
## 127        Peled et al., 2020
#Biomin_genes %>% select(Gene, `accessionnumber/geneID`, definition, Ref, Origin, Treatment, Treatment.Origin) %>% View()

127/172 of the Biomineralization Genes are represented in our dataset of 9011 genes, matching to 65/9011 genes

Differentially expressed genes: are any of these Biomineralization genes?

Origin_DEGs <- DEGs %>%  dplyr::filter(Origin < 0.05)

nrow(Origin_DEGs)
## [1] 840
Treatment_DEGs <- DEGs %>%  dplyr::filter(Treatment < 0.05)

nrow(Treatment_DEGs)
## [1] 18
Interaction_DEGs <- DEGs %>%  dplyr::filter(Treatment.Origin < 0.05)

nrow(Interaction_DEGs)
## [1] 30

Setting up for plotting genes, loading in results from glmmseq

library(glmmSeq)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.5.4
## Current Matrix version is 1.6.0
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
## Warning in checkDepPackageVersion(dep_pkg = "TMB"): Package version inconsistency detected.
## glmmTMB was built with TMB version 1.9.3
## Current TMB version is 1.9.4
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
results <- readRDS(file = "glmmSeq.rds") #load in RDS from previous step / previous iteration
results <- glmmQvals(results)
## 
## Treatment
## ---------
## Not Significant     Significant 
##            8993              18 
## 
## Origin
## ------
## Not Significant     Significant 
##            8171             840 
## 
## Treatment:Origin
## ----------------
## Not Significant     Significant 
##            8981              30
source(file = "../Factor_ggmodelPlot.R")

plotColours <- c("skyblue","mediumseagreen")
modColours <- c("dodgerblue3","seagreen4")
Biomin_Origin_DEGs <- Origin_DEGs %>%
  inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))

Biomin_Origin_DEGs$definition
##  [1] "mammalian ependymin-related protein 1-like [Stylophora pistillata]"
##  [2] "Annotated: Vitellogenin"                                           
##  [3] "clone g15888 vitellogenin-like protein gene"                       
##  [4] "clone g1441 vitellogenin-like protein gene"                        
##  [5] "vitellogenin-like [Stylophora pistillata]"                         
##  [6] "uncharacterized protein LOC111323869 [Stylophora pistillata]"      
##  [7] "uncharacterized protein LOC111345150 [Stylophora pistillata]"      
##  [8] "Cephalotoxin-like protein"                                         
##  [9] "carbonic anhydrase [Stylophora pistillata]"                        
## [10] "carbonic anhydrase 2"                                              
## [11] "Annotated: Carbonic Anhydrase (STPCA2-1)"                          
## [12] "Annotated: CarbonicAnhyrase"                                       
## [13] "thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]" 
## [14] "protein lingerer-like [Stylophora pistillata]"                     
## [15] "Annotated: carbonic anhydrase (STPCA2-2)"                          
## [16] "Late embryogenesis protein"
length(Biomin_Origin_DEGs$definition)
## [1] 16
Biomin_Origin_DEG_names <- unique(Biomin_Origin_DEGs$Gene)

length(Biomin_Origin_DEG_names)
## [1] 10
Biomin_Origin_DEGs %>% select(Gene, `accessionnumber/geneID`, definition, Ref) 
##                                         Gene accessionnumber/geneID
## 1  Pocillopora_acuta_HIv2___RNAseq.g25351.t1         XP_022794351.1
## 2     Pocillopora_acuta_HIv2___TS.g13222.t1b         Gene:g15294.t1
## 3     Pocillopora_acuta_HIv2___TS.g13222.t1b             P24_g15888
## 4     Pocillopora_acuta_HIv2___TS.g13222.t1b              P26_g1441
## 5     Pocillopora_acuta_HIv2___TS.g13222.t1b         XP_022779720.1
## 6  Pocillopora_acuta_HIv2___RNAseq.g21232.t1         XP_022783044.1
## 7  Pocillopora_acuta_HIv2___RNAseq.g20587.t2         XP_022808163.1
## 8   Pocillopora_acuta_HIv2___RNAseq.g5013.t1             JR986059.1
## 9      Pocillopora_acuta_HIv2___TS.g12304.t1             ACE95141.1
## 10     Pocillopora_acuta_HIv2___TS.g12304.t1             EU532164.1
## 11     Pocillopora_acuta_HIv2___TS.g12304.t1         Gene:g29033.t1
## 12     Pocillopora_acuta_HIv2___TS.g12304.t1         Gene:g29034.t1
## 13 Pocillopora_acuta_HIv2___RNAseq.g10093.t2         XP_022804785.1
## 14  Pocillopora_acuta_HIv2___RNAseq.g7908.t1         XP_022806664.1
## 15 Pocillopora_acuta_HIv2___RNAseq.g13824.t1            Gene:g27814
## 16 Pocillopora_acuta_HIv2___RNAseq.g16715.t1             P28_g11651
##                                                            definition
## 1  mammalian ependymin-related protein 1-like [Stylophora pistillata]
## 2                                             Annotated: Vitellogenin
## 3                         clone g15888 vitellogenin-like protein gene
## 4                          clone g1441 vitellogenin-like protein gene
## 5                           vitellogenin-like [Stylophora pistillata]
## 6        uncharacterized protein LOC111323869 [Stylophora pistillata]
## 7        uncharacterized protein LOC111345150 [Stylophora pistillata]
## 8                                           Cephalotoxin-like protein
## 9                          carbonic anhydrase [Stylophora pistillata]
## 10                                               carbonic anhydrase 2
## 11                           Annotated: Carbonic Anhydrase (STPCA2-1)
## 12                                        Annotated: CarbonicAnhyrase
## 13  thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 14                      protein lingerer-like [Stylophora pistillata]
## 15                           Annotated: carbonic anhydrase (STPCA2-2)
## 16                                         Late embryogenesis protein
##                          Ref
## 1         Peled et al., 2020
## 2  Mummadisetti et al., 2021
## 3         Drake et al., 2013
## 4         Drake et al., 2013
## 5         Peled et al., 2020
## 6         Peled et al., 2020
## 7         Peled et al., 2020
## 8   Ramos-Silva et al., 2013
## 9          Moya et al., 2008
## 10     Bertucci et al., 2011
## 11 Mummadisetti et al., 2021
## 12 Mummadisetti et al., 2021
## 13        Peled et al., 2020
## 14        Peled et al., 2020
## 15 Mummadisetti et al., 2021
## 16        Drake et al., 2013

16/172 of the Biomineralization Genes are represented in the Origin DEGS, and these are 10 Pocillopora genes (some of the 10 have matches to multiple Biomineralization Genes) out of the 65 that are matching to Biomineralization Genes (10/65)

Pocillopora_acuta_HIv2___TS.g13222.t1b is a best match for: - Gene:g15294.t1 Annotated: Vitellogenin - P24_g15888 clone g15888 vitellogenin-like protein gene - P26_g1441 clone g1441 vitellogenin-like protein gene - XP_022779720.1 vitellogenin-like [Stylophora pistillata]

Pocillopora_acuta_HIv2___TS.g12304.t1 is a best match for: - ACE95141.1 carbonic anhydrase [Stylophora pistillata] - EU532164.1 carbonic anhydrase 2 - Gene:g29033.t1 Annotated: Carbonic Anhydrase (STPCA2-1) - Gene:g29034.t1 Annotated: CarbonicAnhyrase

for (i in Biomin_Origin_DEG_names) {print(Factor_ggmodelPlot(results,
            geneName = i,
            x1var = "Treatment",
            x2var="Origin", addBox = T,
            xlab = "Treatment and Origin",
            title = i,
            colours = plotColours,
            lineColours = plotColours, 
            modelColours = modColours,
            modelSize = 3))}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Biomin_Treatment_DEGs <- Treatment_DEGs %>%
  inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))

Biomin_Treatment_DEGs$definition
## character(0)

0/172 of the Biomineralization Genes are represented in the Treatment DEGS

Biomin_Interaction_DEGs <- Interaction_DEGs %>%
  inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))

Biomin_Interaction_DEGs$definition
## character(0)

0/172 of the Biomineralization Genes are represented in the Interaction DEGS

Frontloaded genes!

FRONTs <- read.csv(file="../../../output/Slope_Base/frontloaded_genes.csv", sep=',', header=TRUE)  %>% dplyr::select(!c('X'))

Biomin_FRONTs <- FRONTs %>%
  inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))

Biomin_FRONTs$definition
##  [1] "Mucin4-like protein"                                                                            
##  [2] "Sushi domain-containing"                                                                        
##  [3] "Mucin-4 [Stylophora pistillata]"                                                                
##  [4] "plasma membrane calcium ATPase [Stylophora pistillata]"                                         
##  [5] "Hephaestin-like protein"                                                                        
##  [6] "hephaestin-like protein [Stylophora pistillata]"                                                
##  [7] "solute carrier family 4 member gamma [Stylophora pistillata]"                                   
##  [8] "Complement C3 [Stylophora pistillata]"                                                          
##  [9] "Major yolk protein"                                                                             
## [10] "major yolk protein-like isoform X2 [Stylophora pistillata]"                                     
## [11] "CARP1 [Stylophora pistillata]"                                                                  
## [12] "Annotated: CARP1"                                                                               
## [13] "Uncharacterized skeletal organic matrix protein-3  (USOMP-3)"                                   
## [14] "Annotated: Tolloid-Like"                                                                        
## [15] "Cephalotoxin-like protein"                                                                      
## [16] "microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]"           
## [17] "microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]"           
## [18] "sodium bicarbonate cotransporter 3-like isoform X2"                                             
## [19] "carbonic anhydrase [Stylophora pistillata]"                                                     
## [20] "carbonic anhydrase 2"                                                                           
## [21] "Annotated: Carbonic Anhydrase (STPCA2-1)"                                                       
## [22] "Annotated: CarbonicAnhyrase"                                                                    
## [23] "spore wall protein 2-like isoform X3 [Stylophora pistillata]"                                   
## [24] "L-type calcium channel alpha-1 subunit"                                                         
## [25] "Annotated: carbonic anhydrase (STPCA2-2)"                                                       
## [26] "MAM and LDLr domain-containing protein"                                                         
## [27] "MAM and LDLr domain-containing protein"                                                         
## [28] "Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)"                                 
## [29] "MAM and LDL-receptor domain- containing protein 2"                                              
## [30] "MAM and LDL-receptor domain- containing protein 1"                                              
## [31] "MAM domain anchor protein"                                                                      
## [32] "MAM/LDL receptor domain containing protein"                                                     
## [33] "Zonadhesion-like precursor"                                                                     
## [34] "MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]"          
## [35] "MAGUK p55 subfamily member 7-like [Stylophora pistillata]"                                      
## [36] "uncharacterized protein LOC111344812 [Stylophora pistillata]"                                   
## [37] "SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]"                         
## [38] "Late embryogenesis protein"                                                                     
## [39] "EGF and laminin G domain-containing protein"                                                    
## [40] "EGF and laminin G domain-containing protein"                                                    
## [41] "Laminin G domain-containing protein"                                                            
## [42] "EGF and laminin G domain-containing protein"                                                    
## [43] "Annotated: EGF and LamininG-Like (EGF LamG2)"                                                   
## [44] "Annotated: EGF and LamininG-Like (EGF LamG1)"                                                   
## [45] "EGF and laminin G domain-containing protein"                                                    
## [46] "Contactin-associated protein"                                                                   
## [47] "Neurexin"                                                                                       
## [48] "EGF and laminin G domain-containing protein-like [Stylophora pistillata]"                       
## [49] "low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]"                
## [50] "TSP-1 and VWA domain-containing"                                                                
## [51] "Annotated: Thrombospondin-like protein (Thrombospondin)"                                        
## [52] "Annotated: Coadhesin"                                                                           
## [53] "clone g9951 alpha collagen-like protein gene"                                                   
## [54] "Thrombospondin"                                                                                 
## [55] "Hemicentin"                                                                                     
## [56] "coadhesin-like isoform X3 [Stylophora pistillata]"                                              
## [57] "Integrin - alpha"                                                                               
## [58] "hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]"                                
## [59] "von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]"
## [60] "digestive cysteine proteinase 1-like [Stylophora pistillata]"                                   
## [61] "Cystein-rich"                                                                                   
## [62] "Uncharacterized skeletal organic matrix protein-2  (USOMP-2)"                                   
## [63] "polycystic kidney disease 1-related (PKD1-related) protein"                                     
## [64] "polycystic kidney disease 1-related (PKD1-related) protein"
length(Biomin_FRONTs$definition)
## [1] 64
Biomin_FRONTs_names <- unique(Biomin_FRONTs$Gene)

length(Biomin_FRONTs_names)
## [1] 29
Biomin_FRONTs %>% select(Gene, `accessionnumber/geneID`, definition, Ref) 
##                                          Gene accessionnumber/geneID
## 1   Pocillopora_acuta_HIv2___RNAseq.g13823.t1       aug_v2a.09809.t1
## 2   Pocillopora_acuta_HIv2___RNAseq.g13823.t1              P13_g6918
## 3   Pocillopora_acuta_HIv2___RNAseq.g13823.t1             PFX18785.1
## 4   Pocillopora_acuta_HIv2___RNAseq.g27976.t1             AAR13013.1
## 5   Pocillopora_acuta_HIv2___RNAseq.g27566.t1       aug_v2a.24015.t1
## 6   Pocillopora_acuta_HIv2___RNAseq.g27566.t1         XP_022788227.1
## 7   Pocillopora_acuta_HIv2___RNAseq.g15280.t1             AJQ31790.1
## 8    Pocillopora_acuta_HIv2___RNAseq.g8821.t1             PFX26597.1
## 9   Pocillopora_acuta_HIv2___RNAseq.g14653.t1               P8_g9654
## 10  Pocillopora_acuta_HIv2___RNAseq.g14653.t1         XP_022786918.1
## 11  Pocillopora_acuta_HIv2___RNAseq.g16280.t1             AGE35225.2
## 12  Pocillopora_acuta_HIv2___RNAseq.g16280.t1             Gene:g1484
## 13     Pocillopora_acuta_HIv2___TS.g23724.t1a             JR997000.1
## 14  Pocillopora_acuta_HIv2___RNAseq.g26037.t1          Gene:g5735.t1
## 15   Pocillopora_acuta_HIv2___RNAseq.g5013.t1             JR986059.1
## 16      Pocillopora_acuta_HIv2___TS.g11659.t1         XP_022809269.1
## 17      Pocillopora_acuta_HIv2___TS.g11659.t1         XP_022809270.1
## 18   Pocillopora_acuta_HIv2___RNAseq.g7402.t1         XP_022801463.1
## 19      Pocillopora_acuta_HIv2___TS.g12304.t1             ACE95141.1
## 20      Pocillopora_acuta_HIv2___TS.g12304.t1             EU532164.1
## 21      Pocillopora_acuta_HIv2___TS.g12304.t1         Gene:g29033.t1
## 22      Pocillopora_acuta_HIv2___TS.g12304.t1         Gene:g29034.t1
## 23   Pocillopora_acuta_HIv2___RNAseq.g5807.t1         XP_022803872.1
## 24  Pocillopora_acuta_HIv2___RNAseq.g21501.t1             AAD11470.1
## 25  Pocillopora_acuta_HIv2___RNAseq.g13824.t1            Gene:g27814
## 26  Pocillopora_acuta_HIv2___RNAseq.g25935.t1       aug_v2a.09968.t1
## 27  Pocillopora_acuta_HIv2___RNAseq.g25935.t1       aug_v2a.09969.t1
## 28  Pocillopora_acuta_HIv2___RNAseq.g25935.t1            Gene:g15955
## 29  Pocillopora_acuta_HIv2___RNAseq.g25935.t1             JR994474.1
## 30  Pocillopora_acuta_HIv2___RNAseq.g25935.t1             JT011118.1
## 31  Pocillopora_acuta_HIv2___RNAseq.g25935.t1              P20_g6066
## 32  Pocillopora_acuta_HIv2___RNAseq.g25935.t1              P34_g1714
## 33  Pocillopora_acuta_HIv2___RNAseq.g25935.t1             P36_g13890
## 34  Pocillopora_acuta_HIv2___RNAseq.g25935.t1         XP_022794736.1
## 35  Pocillopora_acuta_HIv2___RNAseq.g15517.t1         XP_022789932.1
## 36 Pocillopora_acuta_HIv2___RNAseq.g24861.t1b         XP_022807807.1
## 37  Pocillopora_acuta_HIv2___RNAseq.g27376.t1         XP_022806928.1
## 38  Pocillopora_acuta_HIv2___RNAseq.g16715.t1             P28_g11651
## 39  Pocillopora_acuta_HIv2___RNAseq.g26221.t1       aug_v2a.06122.t1
## 40  Pocillopora_acuta_HIv2___RNAseq.g26221.t1       aug_v2a.06123.t1
## 41  Pocillopora_acuta_HIv2___RNAseq.g26221.t1       aug_v2a.15580.t1
## 42  Pocillopora_acuta_HIv2___RNAseq.g26221.t1       aug_v2a.24512.t1
## 43  Pocillopora_acuta_HIv2___RNAseq.g26221.t1            Gene:g34749
## 44  Pocillopora_acuta_HIv2___RNAseq.g26221.t1             Gene:g7086
## 45  Pocillopora_acuta_HIv2___RNAseq.g26221.t1             JR980881.1
## 46  Pocillopora_acuta_HIv2___RNAseq.g26221.t1             P19_g20041
## 47  Pocillopora_acuta_HIv2___RNAseq.g26221.t1             P31_g20420
## 48  Pocillopora_acuta_HIv2___RNAseq.g26221.t1         XP_022804012.1
## 49   Pocillopora_acuta_HIv2___RNAseq.g4085.t1         XP_022798902.1
## 50   Pocillopora_acuta_HIv2___RNAseq.g6446.t1       aug_v2a.05945.t1
## 51   Pocillopora_acuta_HIv2___RNAseq.g6446.t1             Gene:g2829
## 52   Pocillopora_acuta_HIv2___RNAseq.g6446.t1          Gene:g2829.t1
## 53   Pocillopora_acuta_HIv2___RNAseq.g6446.t1              P14_g9951
## 54   Pocillopora_acuta_HIv2___RNAseq.g6446.t1              P3_g12510
## 55   Pocillopora_acuta_HIv2___RNAseq.g6446.t1              P5_g11674
## 56   Pocillopora_acuta_HIv2___RNAseq.g6446.t1         XP_022783415.1
## 57      Pocillopora_acuta_HIv2___TS.g15792.t1             P27_g18472
## 58      Pocillopora_acuta_HIv2___TS.g15792.t1             PFX30903.1
## 59  Pocillopora_acuta_HIv2___RNAseq.g28226.t2         XP_022810585.1
## 60  Pocillopora_acuta_HIv2___RNAseq.g18103.t1         XP_022803524.1
## 61      Pocillopora_acuta_HIv2___TS.g1545.t1b       aug_v2a.15064.t1
## 62      Pocillopora_acuta_HIv2___TS.g1545.t1b             JR982706.1
## 63  Pocillopora_acuta_HIv2___RNAseq.g16433.t1          aug_v2a.02830
## 64  Pocillopora_acuta_HIv2___RNAseq.g16433.t1       aug_v2a.02830.t1
##                                                                                         definition
## 1                                                                              Mucin4-like protein
## 2                                                                          Sushi domain-containing
## 3                                                                  Mucin-4 [Stylophora pistillata]
## 4                                           plasma membrane calcium ATPase [Stylophora pistillata]
## 5                                                                          Hephaestin-like protein
## 6                                                  hephaestin-like protein [Stylophora pistillata]
## 7                                     solute carrier family 4 member gamma [Stylophora pistillata]
## 8                                                            Complement C3 [Stylophora pistillata]
## 9                                                                               Major yolk protein
## 10                                      major yolk protein-like isoform X2 [Stylophora pistillata]
## 11                                                                   CARP1 [Stylophora pistillata]
## 12                                                                                Annotated: CARP1
## 13                                    Uncharacterized skeletal organic matrix protein-3  (USOMP-3)
## 14                                                                         Annotated: Tolloid-Like
## 15                                                                       Cephalotoxin-like protein
## 16            microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]
## 17            microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]
## 18                                              sodium bicarbonate cotransporter 3-like isoform X2
## 19                                                      carbonic anhydrase [Stylophora pistillata]
## 20                                                                            carbonic anhydrase 2
## 21                                                        Annotated: Carbonic Anhydrase (STPCA2-1)
## 22                                                                     Annotated: CarbonicAnhyrase
## 23                                    spore wall protein 2-like isoform X3 [Stylophora pistillata]
## 24                                                          L-type calcium channel alpha-1 subunit
## 25                                                        Annotated: carbonic anhydrase (STPCA2-2)
## 26                                                          MAM and LDLr domain-containing protein
## 27                                                          MAM and LDLr domain-containing protein
## 28                                  Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)
## 29                                               MAM and LDL-receptor domain- containing protein 2
## 30                                               MAM and LDL-receptor domain- containing protein 1
## 31                                                                       MAM domain anchor protein
## 32                                                      MAM/LDL receptor domain containing protein
## 33                                                                      Zonadhesion-like precursor
## 34           MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]
## 35                                       MAGUK p55 subfamily member 7-like [Stylophora pistillata]
## 36                                    uncharacterized protein LOC111344812 [Stylophora pistillata]
## 37                          SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]
## 38                                                                      Late embryogenesis protein
## 39                                                     EGF and laminin G domain-containing protein
## 40                                                     EGF and laminin G domain-containing protein
## 41                                                             Laminin G domain-containing protein
## 42                                                     EGF and laminin G domain-containing protein
## 43                                                    Annotated: EGF and LamininG-Like (EGF LamG2)
## 44                                                    Annotated: EGF and LamininG-Like (EGF LamG1)
## 45                                                     EGF and laminin G domain-containing protein
## 46                                                                    Contactin-associated protein
## 47                                                                                        Neurexin
## 48                        EGF and laminin G domain-containing protein-like [Stylophora pistillata]
## 49                 low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]
## 50                                                                 TSP-1 and VWA domain-containing
## 51                                         Annotated: Thrombospondin-like protein (Thrombospondin)
## 52                                                                            Annotated: Coadhesin
## 53                                                    clone g9951 alpha collagen-like protein gene
## 54                                                                                  Thrombospondin
## 55                                                                                      Hemicentin
## 56                                               coadhesin-like isoform X3 [Stylophora pistillata]
## 57                                                                                Integrin - alpha
## 58                                 hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]
## 59 von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]
## 60                                    digestive cysteine proteinase 1-like [Stylophora pistillata]
## 61                                                                                    Cystein-rich
## 62                                    Uncharacterized skeletal organic matrix protein-2  (USOMP-2)
## 63                                      polycystic kidney disease 1-related (PKD1-related) protein
## 64                                      polycystic kidney disease 1-related (PKD1-related) protein
##                          Ref
## 1      Takeuchi et al., 2016
## 2         Drake et al., 2013
## 3         Peled et al., 2020
## 4       Zoccola et al., 2004
## 5      Takeuchi et al., 2016
## 6         Peled et al., 2020
## 7       Zoccola et al., 2015
## 8         Peled et al., 2020
## 9         Drake et al., 2013
## 10        Peled et al., 2020
## 11         Mass et al., 2013
## 12 Mummadisetti et al., 2021
## 13  Ramos-Silva et al., 2013
## 14 Mummadisetti et al., 2021
## 15  Ramos-Silva et al., 2013
## 16        Peled et al., 2020
## 17        Peled et al., 2020
## 18      Zoccola et al., 2015
## 19         Moya et al., 2008
## 20     Bertucci et al., 2011
## 21 Mummadisetti et al., 2021
## 22 Mummadisetti et al., 2021
## 23        Peled et al., 2020
## 24      Zoccola et al., 1999
## 25 Mummadisetti et al., 2021
## 26     Takeuchi et al., 2016
## 27     Takeuchi et al., 2016
## 28 Mummadisetti et al., 2021
## 29  Ramos-Silva et al., 2013
## 30  Ramos-Silva et al., 2013
## 31        Drake et al., 2013
## 32        Drake et al., 2013
## 33        Drake et al., 2013
## 34        Peled et al., 2020
## 35        Peled et al., 2020
## 36        Peled et al., 2020
## 37        Peled et al., 2020
## 38        Drake et al., 2013
## 39     Takeuchi et al., 2016
## 40     Takeuchi et al., 2016
## 41     Takeuchi et al., 2016
## 42     Takeuchi et al., 2016
## 43 Mummadisetti et al., 2021
## 44 Mummadisetti et al., 2021
## 45  Ramos-Silva et al., 2013
## 46        Drake et al., 2013
## 47        Drake et al., 2013
## 48        Peled et al., 2020
## 49        Peled et al., 2020
## 50     Takeuchi et al., 2016
## 51 Mummadisetti et al., 2021
## 52 Mummadisetti et al., 2021
## 53        Drake et al., 2013
## 54        Drake et al., 2013
## 55        Drake et al., 2013
## 56        Peled et al., 2020
## 57        Drake et al., 2013
## 58        Peled et al., 2020
## 59        Peled et al., 2020
## 60        Peled et al., 2020
## 61     Takeuchi et al., 2016
## 62  Ramos-Silva et al., 2013
## 63     Takeuchi et al., 2016
## 64     Takeuchi et al., 2016

64/172 of the Biomineralization Genes are represented in the Frontloaded genes

This is 29 genes, some of which are mapping to multiple Biomineralization genes, out of the 65 that are matching to Biomineralization Genes (29/65)

for (i in Biomin_FRONTs_names) {print(Factor_ggmodelPlot(results,
            geneName = i,
            x1var = "Treatment",
            x2var="Origin", addBox = T,
            xlab = "Treatment and Origin",
            title = i,
            colours = plotColours,
            lineColours = plotColours, 
            modelColours = modColours,
            modelSize = 3))}

READY <- read.csv(file="../../../output/Slope_Base/frontloaded_genes_plotting.csv", sep=',', header=TRUE)  %>% dplyr::select(!c('X'))


READY$color <- rep('gray', nrow(READY))
#These are "frontloaded, need a different color:
READY$color[READY$yall > 1 & READY$xall_1 < 1] <- 'black'
READY$color[READY$Gene %in% merged_data$Pocillopora_acuta_best_hit] <- 'red'
READY$color[READY$yall > 1 & READY$xall_1 < 1 & READY$Gene %in% merged_data$Pocillopora_acuta_best_hit] <- 'pink'

READY_cutoff <- READY %>% dplyr::filter(yall < 6) %>% dplyr::filter(xall_1 < 6)

P <- READY_cutoff %>% 
        ggplot(aes(x=xall_1, y=yall)) +
        #geom_point(colour = READY_cutoff$color, alpha=0.8) +
        geom_point(data = subset(READY_cutoff, READY_cutoff$color != "red"), colour = subset(READY_cutoff$color, READY_cutoff$color != "red"), alpha = 0.8) +
        geom_point(data = subset(READY_cutoff, READY_cutoff$color == "red"), colour = subset(READY_cutoff$color, READY_cutoff$color == "red"), alpha = 0.8) +
        geom_point(data = subset(READY_cutoff, READY_cutoff$color == "pink"), colour = subset(READY_cutoff$color, READY_cutoff$color == "pink"), alpha = 1) +
        theme_classic() + 
        stat_smooth(method = "lm", formula = y ~ x + poly(x, 2) - 1) +
        geom_vline(xintercept=1, linetype="dotted") + 
        geom_hline(yintercept=1, linetype="dotted") + 
        labs(y= "Flat to Slope (Conditioned to naive) control ratio", 
             x = "Flat to Slope (Conditioned to naive) foldchange ratio",
             title = "Frontloaded genes") + 
        scale_x_continuous(limits = c(0,6.1),expand = c(0, 0)) + scale_y_continuous(limits = c(0,6.1), expand = c(0, 0)) + 
        annotate("rect", xmin = 0, xmax = 1, ymin = 1, ymax = 6.1, alpha = .2) + 
        annotate("rect", xmin = 0, xmax = 1, ymin = 0, ymax = 1,alpha = .5)

P

Output lists of frontloaded genes with Biomineralization gene info

Biomin_FRONTs_info <- merged_data %>% filter(merged_data$Pocillopora_acuta_best_hit %in% Biomin_FRONTs_names)


write.csv(Biomin_FRONTs_info, "~/Documents/URI/Heron-Pdam-gene-expression/BioInf/output/Biomin_frontloaded.csv", row.names = F)